package com.six.compress.old;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.orc.CompressionKind;
import org.apache.orc.OrcFile;
import org.apache.orc.TypeDescription;
import org.apache.orc.Writer;
import org.apache.orc.storage.ql.exec.vector.DoubleColumnVector;
import org.apache.orc.storage.ql.exec.vector.LongColumnVector;
import org.apache.orc.storage.ql.exec.vector.VectorizedRowBatch;

import java.util.Random;

@SuppressWarnings("all")

/**
 * 测试文件：1068MB
 *
 * 10000条,28001列float
 * none
 *当前:第=>9999条 cost=>104573  avg=>10ms/条
 * cost=>104811  avg=>10ms/条
 * 文件：1.1GB
 * zlib
 * 当前:第=>9999条 cost=>107261  avg=>10ms/条
 * cost=>107712  avg=>10ms/条
 * 文件：31.6MB
 * snappy
 * 当前:第=>9999条 cost=>103791  avg=>10ms/条
 * cost=>104201  avg=>10ms/条
 * 文件：79MB
 */
public class ORCWriterFloatAllTest {

    public static void main(String[] args) throws Exception {

        //定义ORC数据结构，即表结构
        TypeDescription schema = TypeDescription.createStruct();
        schema.addField("time", TypeDescription.createLong());
        for (int i = 1; i < 28001; i++) {
            schema.addField("p" + i, TypeDescription.createFloat());
        }
        Writer writer = null;
        try {
            //输出ORC文件本地绝对路径
            String lxw_orc1_file = "/home/hdfs/data/fly_param_float_2w8q_none.orc";
            Configuration conf = new Configuration();
            FileSystem.getLocal(conf);
            writer = OrcFile.createWriter(
                    new Path(lxw_orc1_file),
                    OrcFile.writerOptions(conf)
                            .setSchema(schema)
//                            .stripeSize(1024)
//                            .bufferSize(1024)
//                            .blockSize(1024)
                            .compress(CompressionKind.NONE)
                            .version(OrcFile.Version.V_0_12)
            );

            int max_row = 10000;
            VectorizedRowBatch batch = schema.createRowBatch(max_row);
            long start = System.currentTimeMillis();
            Random random = new Random(10000);
            for (int i = 0; i < max_row; i++) {
                int rowcount = batch.size++;
                ((LongColumnVector) batch.cols[0]).fill(start + i);
                for (int j = 1; j < 28001; j++) {
                    ((DoubleColumnVector) batch.cols[j]).fill(random.nextFloat());
                }
                if((i+1)%100==0){
                    writer.addRowBatch(batch);
                    batch.reset();
                    long end = System.currentTimeMillis();
                    System.out.println("当前:第=>"+i+"条"+" cost=>"+(end-start) + "  avg=>"+(end-start)/i+"ms/条");
                }
            }
            writer.addRowBatch(batch);
            writer.close();
            long e = System.currentTimeMillis();
            System.out.println("cost=>" + (e - start) + "  avg=>" + (e - start) / max_row + "ms/条");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

}
